* This file analyzes the Geograin and Minneapolis spot price data.  In addition, it creates
* Maps of oil and rail terminals and the ND railroad network. 

clear all
set mem 4g
set matsize 800

*** Setup file paths
cd "Your paths here"

***** Figure 1 - Wheat prices spreads and oil cars *****
use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
sort year month
collapse (mean) AvgMNspot price spread, by(year month)
gen date = ym(year, month)
format date %tm
tsset date
* Merge in oil cars from Genscape
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta"
rename num_cars oilcars
replace oilcars = oilcars/1000
* Graph (B&W) version
#delimit ;
twoway (tsline AvgMNspot, scheme(s1mono) lcolor(gs9)) (tsline price, lcolor(gs12)) 
(tsline spread, lpattern(dash) lcolor(gs0)) (tsline oilcars, yaxis(2) lcolor(gs0) 
title("Wheat Prices, Spread and Oil Cars") ytitle("Price ($/bu.)") ytitle("Oil (Thousand Cars)", axis(2))
legend(row(1) label(1 "Hub Spot") label(2 "Silo Price") label(3 "Spread") label(4 "Oil Cars") ));
#delimit cr
graph export "Figures/OilCarsWheatSpotSpreadB&W.pdf", replace

***** Figure 2 - Maps *****
*** Wheat spread maps 
foreach y in 2012 2013 2014 2015 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 {
use "Data/MktMNSpreadMonthly`y'm`m'.dta", clear
replace latitude = 45 if marketid == 1
replace longitude = -95 if marketid == 1
geo2xy latitude longitude, proj() replace 
save "Data/MktMNSpreadMonthly`y'm`m'_proj", replace
use "Data/Maps/geo2xy_us_data.dta", clear
keep if STUSPS == "ND" | STUSPS == "SD" | STUSPS == "MT" | STUSPS == "MN" 
#delimit;
spmap using "Data/Maps/States_c_proj", title("`y' m`m'")  fcolor(white) id(_ID)
point(data("Data/MktMNSpreadMonthly`y'm`m'_proj") x(longitude) y(latitude)
by(spread_quint) fcolor(gs12 gs9 gs6 gs3 gs0 white)  size(4 4 4 4 4 5) ); 
#delimit cr
graph export "Data/Maps/WheatSpread`y'm`m'B&W.pdf", replace
}
}
* Version with a legend for combined figure
foreach y in 2013  {
foreach m in 12  {
use "Data/MktMNSpreadMonthly`y'm`m'.dta", clear
drop if spread_quint == 6
replace latitude = 45 if marketid == 1
replace longitude = -95 if marketid == 1
geo2xy latitude longitude, proj() replace 
save "Data/MktMNSpreadMonthly`y'm`m'_proj", replace
use "Data/Maps/geo2xy_us_data.dta", clear
keep if STUSPS == "ND" | STUSPS == "SD" | STUSPS == "MT" | STUSPS == "MN" 
#delimit;
spmap using "Data/Maps/States_c_proj", legend(size(huge) row(1) title("Spread Quintile", position(9) size(huge) ) )  
title("`y' m`m'")  fcolor(white) id(_ID)
point(data("Data/MktMNSpreadMonthly`y'm`m'_proj") x(longitude) y(latitude) 
by(spread_quint) fcolor(gs12 gs9 gs6 gs3 gs0 white) size(4 4 4 4 4 4) 
legenda(on) 
); 
#delimit cr
graph export "Data/Maps/WheatSpread`y'm`m'B&W_legend.pdf", replace
}
}

***** Figure 3 - Railcar auctions ***** 
clear matrix
drop _all
set more off

use "/RailcarAuctions_sta13.dta", clear
	// split the string variable into parts
	split weekending,g(part) p("/")
	// add leading zero
	forvalues j = 1/2 {
		replace part`j' = "0" + part`j' if length(part`j') < 2
	}

	// now put it back together
	gen tmp = part1 + part2 + part3
	// transfer to date
	gen date = date(tmp, "MDY")
	form date %td
	drop tmp part* weekending

replace company="BNSF" if company=="BNSF-GF"
replace company="UP" if company=="UP-Pool"
rename shuttle sh
rename nonshuttle ns
gen cym=company+string(100*bidyear+monthnumber)
drop bidyear monthnumber bidmonth company forsearch
reshape wide ns sh , i(date) j(cym) string

gen nsBNSF=.
gen shBNSF=.
gen nsUP=.
gen shUP=.

forvalues yr=1998(1)2016 {
	foreach mo in 01 02 03 04 05 06 07 08 09 10 11 12  {
		local mon=`mo'
		gen deldate=mdy(`mon',1,`yr')
		format deldate %td
		replace nsBNSF = nsBNSF`yr'`mo' if nsBNSF==. & deldate-date<61
		replace shBNSF = shBNSF`yr'`mo' if shBNSF==. & deldate-date<61
		replace nsUP = nsUP`yr'`mo' if nsUP==. & deldate-date<61
		replace shUP = shUP`yr'`mo' if shUP==. & deldate-date<61
		drop deldate
	}
}
* Graph with spreads
gen week = week(date)
gen year = year(date)
sort year week
collapse (mean) ns* sh*, by(year week)
gen date = yw(year, week)
format date %tw
merge m:1  year week using "Data/MktMNSpreadWeeklyAllSilosAVG.dta",
replace nsBNSF = nsBNSF/3500
replace shBNSF = shBNSF/3500
replace nsUP = nsUP/3500
replace shUP = shUP/3500
* BNSF
#delimit;
twoway (line nsBNSF date if year>2009 & year < 2016, lcolor(gs0)) (line shBNSF date if year>2009 & year < 2016, lcolor(gs6)) 
(line spread date if year>2009 & year < 2016, lcolor(gs12) lpattern(-)) , scheme(s1mono) ytitle("Price ($/bu.)") title("BNSF-GF (Nearby)") 
legend(row(1) label(1 "Non-Shuttle") label(2 "Shuttle") label(3 "Mean Spread"));
#delimit cr
graph export "Figures/CarsSpreadCombined_BNSF.pdf", as(pdf) replace

***** Figure 4 - Timing of shipments/delays
*** Create figures for cumulative grain shipments
* Wheat - Analysis of waybill data 
use "Data/WheatinCropTime.dta", clear
drop if cropyear == 2009
drop if cropyear == 2014
egen state_id = group(O_ST)
keep if O_ST == "ND"
sort O_ST cropyear m
collapse (sum) U_Cars, by(O_ST cropyear m)
sort O_ST cropyear m 
by O_ST cropyear: egen AnnualTot = sum(U_Cars)
gen TotCars = .
* 2010
replace TotCars = U_Cars in 1
forvalues j = 2(1)12 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2011 
replace TotCars = U_Cars in 13
forvalues j = 14(1)25 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2012 
replace TotCars = U_Cars in 26
forvalues j = 27(1)38 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* Average for years excluding 2013 and 2014
sort m
by m: egen AvgTotCars = mean(TotCars)
* 2013 
sort O_ST cropyear m
replace TotCars = U_Cars in 39
forvalues j = 40(1)51 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
replace TotCars = TotCars/1000
replace AvgTotCars = AvgTotCars/1000
* Graph
#delimit ;
twoway (scatter AvgTotCars m if cropyear == 2010, connect(l) msymbol(i) scheme(s1mono)) 
(scatter TotCars m  if cropyear == 2013, connect(l) msymbol(i) lcolor(gs9)
legend(row(1) label( 1 "2010-2012 Avg.") label(2 "2013"))
title("ND Cumulative Wheat Shipments") xtitle("Month of Crop Year") ytitle("Wheat Shipments (Thousand Cars)"));
#delimit cr
graph export "Figures/WheatCumulativeAbs.pdf", replace
* Corn - Analysis of waybill data 
use "Data/CorninCropTime.dta", clear
drop if cropyear == 2009
drop if cropyear == 2014
egen state_id = group(O_ST)
keep if O_ST == "ND"
sort O_ST cropyear m
collapse (sum) U_Cars, by(O_ST cropyear m)
sort O_ST cropyear m 
by O_ST cropyear: egen AnnualTot = sum(U_Cars)
gen TotCars = .
* 2010
replace TotCars = U_Cars in 1
forvalues j = 2(1)11 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2011 
replace TotCars = U_Cars in 12
forvalues j = 13(1)22 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2012 
replace TotCars = U_Cars in 23
forvalues j = 24(1)33 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* Average for years excluding 2013 
sort m
by m: egen AvgTotCars = mean(TotCars)
* 2013 
sort O_ST cropyear m
replace TotCars = U_Cars in 34
forvalues j = 35(1)44 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
replace TotCars = TotCars/1000
replace AvgTotCars = AvgTotCars/1000
* Graph
#delimit ;
twoway (scatter AvgTotCars m if cropyear == 2010, connect(l) msymbol(i) scheme(s1mono)) 
(scatter TotCars m  if cropyear == 2013, connect(l) msymbol(i) lcolor(gs9)
legend(row(1) label( 1 "2010-2012 Avg.") label(2 "2013"))
title("ND Cumulative Corn Shipments") xtitle("Month of Crop Year") ytitle("Corn Shipments (Thousand Cars)"));
#delimit cr
graph export "Figures/CornCumulativeAbs.pdf", replace
* Soy - Analysis of waybill data 
use "Data/SoyinCropTime.dta", clear
drop if cropyear == 2009
drop if cropyear == 2014
egen state_id = group(O_ST)
keep if O_ST == "ND"
sort O_ST cropyear m
collapse (sum) U_Cars, by(O_ST cropyear m)
sort O_ST cropyear m 
by O_ST cropyear: egen AnnualTot = sum(U_Cars)
gen TotCars = .
* 2010
replace TotCars = U_Cars in 1
forvalues j = 2(1)12 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2011 
replace TotCars = U_Cars in 13
forvalues j = 14(1)23 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* 2012 
replace TotCars = U_Cars in 24
forvalues j = 25(1)34 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
* Average for years excluding 2013 
sort m
by m: egen AvgTotCars = mean(TotCars)
* 2013 
sort O_ST cropyear m
replace TotCars = U_Cars in 35
forvalues j = 36(1)45 {
replace TotCars = U_Cars + TotCars[_n-1] in `j'
}
replace TotCars = TotCars/1000
replace AvgTotCars = AvgTotCars/1000
drop if m == 12
* Graph
#delimit ;
twoway (scatter AvgTotCars m if cropyear == 2010, connect(l) msymbol(i) scheme(s1mono)) 
(scatter TotCars m  if cropyear == 2013, connect(l) msymbol(i) lcolor(gs9)
legend(row(1) label( 1 "2010-2012 Avg.") label(2 "2013"))
title("ND Cumulative Soy Shipments") xtitle("Month of Crop Year") ytitle("Soy Shipments (Thousand Cars)"));
#delimit cr
graph export "Figures/SoyCumulativeAbs.pdf", replace

***** Table 1 - Summary statistics
*** Table of summary statistics
use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
* Keep ND, SD, MT, MN but drop OR since it's an export terminal
drop if state == "OR"
tab state
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgMNspot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace MNdist = MNdist/100
sort year
preserve
* Price and Spread
#delimit ;
collapse 
(mean) price spread oilcars Pdiesel
(count) price_n=price spread_n=spread oilcars_n=oilcars Pdiesel_n=Pdiesel
(min) price_min=price spread_min=spread oilcars_min=oilcars Pdiesel_min=Pdiesel
(p25) price_p25=price spread_p25=spread oilcars_p25=oilcars Pdiesel_p25=Pdiesel
(p50) price_p50=price spread_p50=spread oilcars_p50=oilcars Pdiesel_p50=Pdiesel
(p75) price_p75=price spread_p75=spread oilcars_p75=oilcars Pdiesel_p75=Pdiesel
(max) price_max=price spread_pmax=spread oilcars_pmax=oilcars Pdiesel_pmax=Pdiesel, 
by(year);
#delimit cr
xpose, clear varname
order _varname
outsheet using  "Tables/SumStats.csv", delimiter(,) replace
restore
preserve
* Oil Cars 
keep year month oilcars 
duplicates drop
#delimit ;
collapse 
(mean) oilcars
(count) oilcars_n=oilcars 
(min) oilcars_min=oilcars
(p25) oilcars_p25=oilcars
(p50) oilcars_p50=oilcars
(p75) oilcars_p75=oilcars
(max) oilcars_pmax=oilcars, 
by(year);
#delimit cr
xpose, clear varname
order _varname
outsheet using  "Tables/SumStatsOilCars.csv", delimiter(,) replace
* Summary stats on "All" variables
restore
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSWheatState.dta",
keep if _merge == 3
drop _merge
#delimit ;
collapse 
(mean) price spread oilcars Pdiesel MNdist tmin TotTraffic wheat
(sd) price_sd=price spread_sd=spread oilcars_sd=oilcars Pdiesel_sd=Pdiesel
MNdist_sd=MNdist tmin_sd=tmin TotTraffic_sd=TotTraffic wheat_sd=wheat
(min) price_min=price spread_min=spread oilcars_min=oilcars Pdiesel_min=Pdiesel
MNdist_min=MNdist tmin_min=tmin TotTraffic_min=TotTraffic wheat_min=wheat
(max) price_max=price spread_pmax=spread oilcars_pmax=oilcars Pdiesel_pmax=Pdiesel
MNdist_max=MNdist tmin_max=tmin TotTraffic_max=TotTraffic wheat_max=wheat;
#delimit cr
xpose, clear varname
order _varname
outsheet using  "Tables/SumStatsAllVars.csv", delimiter(,) replace

***** Table 2 - Main spread results for wheat, corn and soy
*** Spread regressions 
** Wheat
use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSWheatState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN but drop OR since it's an export terminal
drop if state == "OR"
tab state
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgMNspot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace MNdist = MNdist/100
* Cars by distance interaction
gen carsXdistance = oilcars*MNdist
* Production in million bu.
replace wheat = wheat/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
* Make diesel price X distance interaction variable
gen PdDist = Pdiesel*MNdist
** Models 
* Distance
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(constant) 
eststo wheat1
* Market/silo fixed effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid) 
eststo wheat2
* Add seasonal effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month) 
eststo wheat3
* Add production
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat4
* Add tmin
reghdfe spread oilcars PdDist tmin, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat5
* Add Total non oil and grain traffic
reghdfe spread oilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat6
* Change from dropping 2015 date
reghdfe spread oilcars PdDist tmin if year < 2015, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
** Make table 
#delimit;
estout wheat1 wheat2 wheat3 wheat4 wheat5 wheat6
using "Tables/WheatSpread.txt", order(oilcars PdDist) 
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
** Corn
use "Data/MktCornSpreadMonthlyAllSilos.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSCornState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN, NE, IA but drop OR since it's an export terminal
drop if state == "OR"
tab state
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgCornSpot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace CHIdist = CHIdist/100
* Cars by distance interaction
gen carsXdistance = oilcars*CHIdist
* Production in million bu.
replace corn = corn/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
* Make diesel price X distance interaction variable
gen PdDist = Pdiesel*CHIdist
** Models 
* Distance
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(constant) 
eststo corn1
* Market/silo fixed effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid) 
eststo corn2
* Add seasonal effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month) 
eststo corn3
* Add production
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month#c.corn)
eststo corn4
* Add tmin
reghdfe spread oilcars PdDist tmin, vce(cluster marketid date) absorb(i.marketid i.month#c.corn)
eststo corn5
* Add Total non oil and grain traffic
reghdfe spread oilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.corn)
eststo corn6
** Make table 
#delimit;
estout corn1 corn2 corn3 corn4 corn5 corn6
using "Tables/CornSpread.txt", order(oilcars PdDist)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
** Soy
use "Data/MktSoySpreadMonthlyAllSilos.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSSoyState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN, NE, IA but drop OR since it's an export terminal
drop if state == "OR"
tab state
*gen date = ym(year, month)
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgSoySpot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace CHIdist = CHIdist/100
* Cars by distance interaction
gen carsXdistance = oilcars*CHIdist
* Production in million bu.
replace soy = soy/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
* Make diesel price X distance interaction variable
gen PdDist = Pdiesel*CHIdist
** Models 
* Distance
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(constant) 
eststo soy1
* Market/silo fixed effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid) 
eststo soy2
* Add seasonal effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month) 
eststo soy3
* Add production
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month#c.soy) 
eststo soy4
* Add tmin
reghdfe spread oilcars PdDist tmin, vce(cluster marketid date) absorb(i.marketid i.month#c.soy) 
eststo soy5
* Add Total non oil and grain traffic
reghdfe spread oilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.soy) 
eststo soy6
** Make table 
#delimit;
estout soy1 soy2 soy3 soy4 soy5 soy6
using "Tables/SoySpread.txt", order(oilcars PdDist)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Table 3 - Check using different spreads for east and west shipments
** Create Portland Spread
* Reshape futures data
use "/Users/Jon/Dropbox/Rail and Oil/Data/quandl/MWfut_JH.dta", clear
reshape long cont p, i(date) j(Contract) string
drop if  p ==.
drop if cont == .
rename cont FutMonth
rename p FutPrice
replace FutPrice = FutPrice/100
sort date FutMonth
save "/Users/Jon/Dropbox/Rail and Oil/Data/quandl/MWfut_clean.dta", replace
* Open Portland Data
insheet using "/Users/Jon/Dropbox/Rail and Oil/Data/PortlandWheat.csv", clear
keep if class=="DARK NORTHERN SPRING"
rename futuresmonthyearlow FutMonth
drop futuresmonthyearhigh
rename v10 FutExch
drop if low==.&high==.
split variety, p(%)
gen Variety=floor(real(variety1))
drop variety1
* Look only at 14% protein contracts
keep if Variety == 14
* Reshape to wide (Only need this if we have multiple protein contents
reshape wide low high , i(reportdate location class gradedescription units transmode FutExch FutMonth pricingpoint deliveryperiod ) j(Variety) 
gen spread14=(low14+high14)/2
* Convert to $/bu.
replace spread14 = spread14/100 
* Format date variable
split reportdate, p(/)
gen month = real(reportdate1)
gen day = real(reportdate2)
gen year = real(reportdate3)+2000
drop reportdate*
gen reportdate = mdy(month, day, year)
gen date = reportdate
format date %td
save "/Users/Jon/Dropbox/Rail and Oil/Data/PNWWheat.dta", replace
* Merge in futures data
split FutMonth, p(-)
gen fumonth = .
replace fumonth = 12 if FutMonth1 == "Dec"
replace fumonth = 7 if FutMonth1 == "Jul"
replace fumonth = 3 if FutMonth1 == "Mar"
replace fumonth = 5 if FutMonth1 == "May"
replace fumonth = 9 if FutMonth1 == "Sep"
gen fuyear = real(FutMonth2) + 2000
drop FutMonth
gen FutMonth = mdy(fumonth, 1, fuyear)
format FutMonth %td
sort date FutMonth
keep date FutMonth spread14
merge m:1 date FutMonth using "/Users/Jon/Dropbox/Rail and Oil/Data/quandl/MWfut_clean.dta", 
keep if _merge == 3
drop _merge
gen PortPrice = spread14 + FutPrice
* Collapse to monthly
gen month = month(date)
gen year = year(date)
sort year month
collapse (mean) spread14 FutPrice PortPrice, by(year month)
gen date = ym(year, month)
format date %tm
save "/Users/Jon/Dropbox/Rail and Oil/Data/quandl/PortlandPrices.dta", replace
** Wheat
use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
sort date
merge m:1 date using "/Users/Jon/Dropbox/Rail and Oil/Data/quandl/PortlandPrices.dta", 
keep if _merge == 3
drop _merge
* Make PNW spread variable
gen PNWspread = PortPrice-price
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSWheatState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN but drop OR since it's likely an export terminal
drop if state == "OR"
tab state
*gen date = ym(year, month)
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgMNspot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Generate PNW distance variable
geodist latitude longitude 45.5122 -122.6587, gen(PNWdist) miles
* Production in million bu.
replace wheat = wheat/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
** Make deciles of min temperature
xtile tempbin = tmin, nq(10)
tabstat tmin, by(tempbin)
** Split spreads by geography
replace spread = PNWspread if PNWdist < MNdist
* Distance variable
gen dist = MNdist
replace dist = PNWdist if PNWdist < MNdist
* Convert distance to 100 miles
replace dist = dist/100
* Diesel price distance interaction
gen PdDist = Pdiesel*dist
** Models 
* Distance
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(constant) 
eststo wheat1PNW
* Market/silo fixed effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid) 
eststo wheat2PNW
* Add seasonal effects
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month) 
eststo wheat3PNW
* Add production
reghdfe spread oilcars PdDist, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat4PNW
* Add tmin
reghdfe spread oilcars PdDist tmin, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat5PNW
* Add Total non oil and grain traffic
reghdfe spread oilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo wheat6PNW
** Make table 
#delimit;
estout wheat1PNW wheat2PNW wheat3PNW wheat4PNW wheat5PNW wheat6PNW
using "Tables/WheatSpreadPNW.txt", order(oilcars PdDist) 
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Table 4 - time-series models *****
use "$path/Data/MN_Spot.dta" 
drop reportdate
replace variety=substr(variety,1,2)
destring variety, replace
replace variety=variety/100
rename bid_l temp
rename bid_h bid_l
rename temp bid_h

save "$path/Data/tempMN.dta", replace

forvalues yr=1998(1)2009 {
	local ii=`yr'
	local jj=2009-`ii'
	import excel "$path/Data/MN`ii'.xlsx", sheet("report (`jj')") firstrow case(lower)  clear
		keep if transmode=="Rail"
		rename bidlevel bid_l
		destring bid_l, replace
		rename i bid_h
		destring bid_h, replace
	gen date=reportdate
	keep date bid_l bid_h  location class variety units transmode pricingpoint deliveryperiod
	order date bid_l bid_h  location class variety units transmode pricingpoint deliveryperiod
	append using "$path/Data/tempMN.dta"
	save "$path/Data/tempMN.dta", replace
}
replace variety=round(100*variety)/100

*** average over protein levels
collapse (mean) bid_l bid_h variety (first) location class units transmode pricingpoint deliveryperiod, by(date)
format variety %9.0g
format date %td

*** Create variable for crop years
gen cropyear = 0
forvalues yr=1998(1)2016 {
	replace cropyear = `yr' if year(date) == `yr' & month(date) > 6
	replace cropyear = `yr' if year(date) == `yr'+1 & month(date) <= 6
}
	
sort cropyear
save "$path/Data/tempMN.dta", replace
import excel "$path/Data/Wheat_Prod.xlsx", sheet("sheet1") firstrow case(lower)  clear

merge 1:m cropyear using "$path/Data/tempMN.dta"
keep if _merge == 3
drop _merge

sort date
save "$path/Data/tempMN.dta", replace

use "$path/Data/GeograinPrices.dta", clear
	keep if commodity=="HRS Wheat"
	keep if delyear==0
	drop if price==.

merge m:1 marketid using "$path/Data/GeograinMarkets.dta"
	keep if _merge == 3
	drop _merge
	sort date

keep if state=="ND"

collapse (mean) price (count) marketid, by(date)

		quietly merge 1:1 date using "$path/Data/tempMN.dta"

		gen MNprice=100*(bid_l+bid_h)/2
		quietly save "$path/Data/time_series_Wheat.dta", replace


		use "$path/Data/PRR_Rail_Daily_Hughes_weekly.dta", clear		
		gen yrag=100*year+week
		save "$path/Data/temp.dta", replace

		use "$path/Data/Diesel_prices.dta", clear		
		rename Date date
		gen Pdiesel=300*WeeklyMidwestNo2DieselRetai 
		gen year=year(date)
		gen week=week(date)
		  gen yrag=100*year+week
		  collapse (mean) Pdiesel, by(yrag)
		merge 1:1 yrag using "$path/Data/temp.dta"
		  keep if _merge==3
  		  drop _merge
		save "$path/Data/temp.dta", replace

		use "$path/Data/time_series_Wheat.dta", clear
		
		gen year=year(date)
		gen month=month(date)
		gen week=week(date)
		  gen yrag=100*year+week
		  drop if MNprice==.
		  drop if price==.
		  collapse (last) price MNprice date (mean) cropyear nd_hrs us_hrs year month week marketid , by(yrag)

		merge 1:1 yrag using "$path/Data/temp.dta"

		drop if MNprice==.

		format date %td

		sort date
		gen tt=_n
		tsset tt

		// change units
		replace num_cars=num_cars/1000
		replace MNprice = MNprice/100
		replace price = price/100


		gen spread=MNprice-price
		gen rel_prod=nd_hrs/us_hrs
		
		save "$path/Data/temp_Wheat.dta", replace

		constraint define 1 [_ce1]num_cars=1
		constraint define 2 [_ce1]price=-MNprice
		constraint define 3 [_ce1]MNprice=1
		constraint define 4 [D_num_cars ]L._ce1=0

		log using "$path/Tables/time_series_wheat_coint.log", replace

		cd G:/temp
		// restricted models 
		vec  num_cars MNprice price  if year>2011&year<2016, lags(2) trend(rc) bconstraints(1/2) iter(1000)
			irf create vec, set(vecirfs2, replace) step(100)
			irf table oirf, i(num_cars)
		vec  num_cars MNprice price  if year>2011&year<2016, lags(1) trend(rc) bconstraints(1/2) iter(1000)
			irf create vec, set(vecirfs1, replace) step(100)
			irf table oirf, i(num_cars)

		// restricted models with different ordering to get beta coefficient. LLF should be same as above
		vec  MNprice price num_cars  if year>2011&year<2016, lags(2) trend(rc) bconstraints(2/3) iter(1000)
			predict resMN, resid eq(D_MNprice)
			predict resND, resid eq(D_price)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(1) trend(rc) bconstraints(2/3) iter(1000)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(2) trend(rc) bconstraints(2/3) iter(1000)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(3) trend(rc) bconstraints(2/3) iter(1000)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(4) trend(rc) bconstraints(2/3) iter(1000)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(5) trend(rc) bconstraints(2/3) iter(1000)
		vec  MNprice price num_cars  if tt>=685&tt<=888, lags(6) trend(rc) bconstraints(2/3) iter(1000)

		// cointegration model for Carter-Smith approach
		vec MNprice price if date>=d(01oct2009)&date<=d(02dec2013), lags(2)  bconstraints(2/3) trend(rc)
			predict resMNcs, resid eq(D_MNprice)
			predict resNDcs, resid eq(D_price)
		vec MNprice price if date>=d(01oct2009)&date<=d(02dec2013), lags(1)  bconstraints(2/3) trend(rc)

		corr resMN resND if date>=d(01oct2009)&date<=d(02dec2013)

		log close

save "$path/Data/Wheat_TS.dta", replace
saveold "$path/Data/Wheat_TS_v12.dta", replace version(12)

gen dd=10000*year+100*month+day(date)
drop date
rename dd date

export delimited date price MNprice using "E:\Dropbox\Research\Rail\Rail and Oil\Data\wheat_CS.csv", replace


***** Table 5 - Waybill models for wheat rail rates *****
**** Analysis of waybill data
*** Back of the envelop calculations on bushels per car
use "Data/CrudeOilGrain.dta", clear
keep if O_ST == "ND"
keep if STCC5 == 1137
gen bpc = tons/U_Cars*33
sum bpc
*** Basic regressions
use "Data/WheatinCropTime.dta", clear
* Total traffic in thousand carloads
replace TotTraffic = TotTraffic/1000
egen state_id = group(O_ST)
*** Make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = O_ST == "MN"
gen MT = O_ST == "MT"
gen ND = O_ST == "ND"
gen NE = O_ST == "NE"
gen SD = O_ST == "SD"
gen MNoilcars = MN*oilcars_stb
gen MToilcars = MT*oilcars_stb
gen NDoilcars = ND*oilcars_stb
gen NEoilcars = NE*oilcars_stb
gen SDoilcars = SD*oilcars_stb
gen MNloilcars = MN*loilcars
gen MTloilcars = MT*loilcars
gen NDloilcars = ND*loilcars
gen NEloilcars = NE*loilcars
gen SDloilcars = SD*loilcars
** Make deciles of min temperature
xtile tempbin = tmin, nq(10)
tabstat tmin, by(tempbin)
preserve
* Summary statistics for waybill data regressions
#delimit ;
collapse 
(mean) U_Cars rpt oilcars_stb Pdiesel tmin TotTraffic production
(sd)  U_Cars_sd=U_Cars rpt_sd=rpt oilcars_stb_sd=oilcars_stb Pdiesel_sd=Pdiesel tmin_sd=tmin TotTraffic_sd=TotTraffic production_sd=production
(min) U_Cars_min=U_Cars rpt_min=rpt oilcars_stb_min=oilcars_stb Pdiesel_min=Pdiesel tmin_min=tmin TotTraffic_min=TotTraffic production_min=production
(max) U_Cars_max=U_Cars rpt_max=rpt oilcars_stb_max=oilcars_stb Pdiesel_max=Pdiesel tmin_max=tmin TotTraffic_max=TotTraffic production_max=production;
#delimit cr
xpose, clear varname
order _varname
outsheet using  "Tables/SumStatsWaybill.csv", delimiter(,) replace
** Revenue Regressions (RPT is really revenue per bushel)
restore
* Base model
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo wheatrev1
* Add crop production (county-level)
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo wheatrev2
* Add temperature
reghdfe rpt oilcars_stb Pdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo wheatrev3
* Add total non-grain and oil traffic
reghdfe rpt oilcars_stb Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo wheatrev4
* Look at differences across states
reghdfe rpt oilcars_stb MToilcars NDoilcars SDoilcars Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo wheatrev5
*** Make revenue per bushel table 
#delimit;
estout wheatrev1 wheatrev2 wheatrev3 wheatrev4 wheatrev5 
using "Tables/RPT.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Table 6 - Waybill models for wheat rail carloads *****
** Cars Regressions
* Base model
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo wheat1
* Add crop production (county-level)
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat2
* Add temperature
reghdfe lcars loilcars lpdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat3
* Add total non-grain and oil traffic
reghdfe lcars loilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat4
* Look at differences across states
xi: cluster2 lcars i.O_ST|loilcars lpdiesel lTotTraff tmin i.m|lproduction i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe lcars loilcars MTloilcars NDloilcars SDloilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat5
*** Regression Table
*** Make car table 
#delimit;
estout wheat1 wheat2 wheat3 wheat4 wheat5 
using "Tables/CARS.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Table 7 - Waybill models for corn and soy rail rates and carloads *****
**** Corn - Analysis of waybill data 
*** Basic regressions
use "Data/CorninCropTime.dta", clear
* Total traffic in thousand carloads
replace TotTraffic = TotTraffic/1000
egen state_id = group(O_ST)
*** Make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = O_ST == "MN"
gen MT = O_ST == "MT"
gen ND = O_ST == "ND"
gen NE = O_ST == "NE"
gen SD = O_ST == "SD"
gen MNoilcars = MN*oilcars_stb
gen MToilcars = MT*oilcars_stb
gen NDoilcars = ND*oilcars_stb
gen NEoilcars = NE*oilcars_stb
gen SDoilcars = SD*oilcars_stb
gen MNloilcars = MN*loilcars
gen MTloilcars = MT*loilcars
gen NDloilcars = ND*loilcars
gen NEloilcars = NE*loilcars
gen SDloilcars = SD*loilcars
** Cars Regressions
* Base model
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo corn1
* Add crop production (county-level)
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo corn2
* Add temperature
reghdfe lcars loilcars lpdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo corn3
* Add total non-grain and oil traffic
reghdfe lcars loilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo corn4
* Look at differences across states
xi: cluster2 lcars i.O_ST|loilcars lpdiesel lTotTraff tmin i.m|lproduction i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe lcars loilcars MNloilcars MTloilcars NDloilcars NEloilcars SDloilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo corn5
** Revenue Regressions (RPT is really revenue per bushel)
* Base model
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo cornrev1
* Add crop production (county-level)
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo cornrev2
* Add temperature
xi: cluster2 rpt oilcars_stb Pdiesel i.m|production tmin i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe rpt oilcars_stb Pdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo cornrev3
* Add total non-grain and oil traffic
reghdfe rpt oilcars_stb Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo cornrev4
* Look at differences across states
xi: cluster2 rpt i.O_ST|oilcars_stb Pdiesel TotTraff tmin i.m|production i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe rpt oilcars_stb MNoilcars MToilcars NDoilcars NEoilcars SDoilcars Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo cornrev5
*** Regression Table
*** Make car table 
#delimit;
estout corn1 corn2 corn3 corn4 corn5 
using "Tables/cornCARS.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
*** Make revenue per bushel table 
#delimit;
estout cornrev1 cornrev2 cornrev3 cornrev4 cornrev5 
using "Tables/cornRPT.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
**** Soy - Analysis of waybill data
*** Basic regressions
use "Data/SoyinCropTime.dta", clear
drop if O_ST == "MT"
* Total traffic in thousand carloads
replace TotTraffic = TotTraffic/1000
egen state_id = group(O_ST)
*** Make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = O_ST == "MN"
gen MT = O_ST == "MT"
gen ND = O_ST == "ND"
gen NE = O_ST == "NE"
gen SD = O_ST == "SD"
gen MNoilcars = MN*oilcars_stb
gen MToilcars = MT*oilcars_stb
gen NDoilcars = ND*oilcars_stb
gen NEoilcars = NE*oilcars_stb
gen SDoilcars = SD*oilcars_stb
gen MNloilcars = MN*loilcars
gen MTloilcars = MT*loilcars
gen NDloilcars = ND*loilcars
gen NEloilcars = NE*loilcars
gen SDloilcars = SD*loilcars
** Cars Regressions
* Base model
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo soy1
* Add crop production (county-level)
reghdfe lcars loilcars lpdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo soy2
* Add temperature
reghdfe lcars loilcars lpdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo soy3
* Add total non-grain and oil traffic
xi: cluster2 lcars loilcars lpdiesel tmin lTotTraff i.m|lproduction i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe lcars loilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo soy4
* Look at differences across states
xi: cluster2 lcars i.O_ST|loilcars lpdiesel lTotTraff tmin i.m|lproduction i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe lcars loilcars MNloilcars NDloilcars NEloilcars SDloilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo soy5
** Revenue Regressions (RPT is really revenue per bushel)
* Base model
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m i.O_FIPS)
eststo soyrev1
* Add crop production (county-level)
reghdfe rpt oilcars_stb Pdiesel, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo soyrev2
* Add temperature
reghdfe rpt oilcars_stb Pdiesel tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo soyrev3
* Add total non-grain and oil traffic
reghdfe rpt oilcars_stb Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo soyrev4
* Look at differences across states
xi: cluster2 rpt i.O_ST|oilcars_stb Pdiesel TotTraff tmin i.m|production i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
reghdfe rpt oilcars_stb MNoilcars NDoilcars NEoilcars SDoilcars Pdiesel tmin TotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.production i.O_FIPS)
eststo soyrev5
*** Regression Table
*** Make car table 
#delimit;
estout soy1 soy2 soy3 soy4 soy5 
using "Tables/soyCARS.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
*** Make revenue per bushel table 
#delimit;
estout soyrev1 soyrev2 soyrev3 soyrev4 soyrev5 
using "Tables/soyRPT.txt", order()
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Table 8 - Corn and Soy Carry *****
*** Carry analysis
** Corn regressions
use "/Users/Jon/Dropbox/rail and oil/Data/Maps/CornMonthlyCarry.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSCornState.dta",
keep if _merge == 3
drop _merge
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", nogen
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Production in million bu.
replace corn = corn/10^6
replace TotTraffic = TotTraffic/10^6
** Models 
**** May want to two-way cluster on marketid and date ***
egen state_id = group(state)
* 1-Month
reghdfe carry1mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo CornCarry1mo
* 3-Month
reghdfe carry3mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo CornCarry3mo 
* 6-month
reghdfe carry6mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo CornCarry6mo 
*** Make carry table 
#delimit;
estout CornCarry1mo CornCarry3mo CornCarry6mo 
using "Tables/CornCarry.txt", order(oilcars)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
** Soy regressions
**** Need to update state interactions so reported as rel. to omitted category ***
* Regressions
use "/Users/Jon/Dropbox/rail and oil/Data/Maps/SoyMonthlyCarry.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSSoyState.dta",
keep if _merge == 3
drop _merge
* Limit states to ND, SD and MN
*keep if state == "ND" | state == "SD" | state == "MN"
*keep if state == "ND" 
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", nogen
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Production in million bu.
replace soy = soy/10^6
replace TotTraffic = TotTraffic/10^6
** Models 
egen state_id = group(state)
* 1-Month
reghdfe carry1mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo SoyCarry1mo
* 3-Month
reghdfe carry3mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo SoyCarry3mo 
* 6-month
reghdfe carry6mo oilcars Pdiesel tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month i.year) 
eststo SoyCarry6mo 
*** Make carry table 
#delimit;
estout SoyCarry1mo SoyCarry3mo SoyCarry6mo  
using "Tables/SoyCarry.txt", order(oilcars)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Appendix Table 1 - Summary of types of goods shipped from ND *****
*** Create a table of summary statistis for ND shipments/shares using waybill sample
** Composition of goods out of ND
* This version of shipment shares uses the Public Waybill Sample
use "/Waybill2010to2014_clean.dta", clear
gen keep = 0
replace keep=1 if org_name == "Bismarck, ND-MT-SD" 
replace keep=1 if org_name == "Fargo-Moorhead, ND-MN" 
replace keep=1 if org_name == "Grand Forks, ND-MN" 
replace keep=1 if org_name == "Minot, ND"
keep if keep == 1 
keep if year >= 2010 & year <= 2014
gen STCC5 = real(stcc)
sort STCC5
merge  m:1 STCC5 using "Data/STCC5_Titles.dta" 
replace STCC5_Title = "Oil" if stcc == "13111"
replace STCC5_Title = "Oil" if stcc == "49101"
replace STCC5_Title = "Oil" if stcc == "49151"
replace STCC5_Title = "Coal" if stcc == "11212"
replace STCC5_Title = "Coal" if stcc == "11221"
replace STCC5_Title = "Sugar" if stcc == "20621"
replace STCC5_Title = "Soybeans" if stcc == "1144"
drop if year == .
* Collapse to averages
sort year STCC5
collapse (sum) exp_cars (first) STCC5_Title, by(year STCC5)
by year: egen AllCars = sum(exp_cars)
gen share = exp_cars/AllCars
gsort + year - share, 
order WYEAR STCC5_Title U_Cars STCC5 AllCars share NonOil
outsheet using  "Tables/NDShipmentSharePublic.csv", delimiter(,) replace


***** Appendix Table 2 - Heterogeneity by distance and state *****
*** Regressions with differential effects by state and or distance
* Wheat 
use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSWheatState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MN and MT but drop OR since it's an export terminal
drop if state == "OR"
*gen date = ym(year, month)
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgMNspot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace MNdist = MNdist/100
* Cars by distance interaction
gen dist_sq = MNdist^2
gen carsXdistance = oilcars*MNdist
gen carsXdistance_sq = oilcars*MNdist^2
* Production in million bu.
replace wheat = wheat/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
*** Need to make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MT = state == "MT"
gen ND = state == "ND"
gen SD = state == "SD"
gen NDoilcars = ND*oilcars
gen MToilcars = MT*oilcars
gen SDoilcars = SD*oilcars
* Generate variable for fuel price distance interaction
gen PdDist = Pdiesel*MNdist
** Models 
* Look at effects of distance
reghdfe spread oilcars MNdist dist_sq Pdiesel tmin TotTraffic i.month#c.wheat, vce(cluster marketid date) absorb(constant)
eststo WheatDist1
* State intereactions
egen state_id = group(state)
reghdfe spread oilcars MToilcars NDoilcars SDoilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.wheat) 
eststo WheatDist3
** Make table 
#delimit;
estout WheatDist1 WheatDist3 
using "Tables/WheatDistSpread.txt", order(oilcars MNdist dist_sq Pdiesel PdDist _IstaXoilca_2 _IstaXoilca_3 _IstaXoilca_4)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Appendix Table 3 - Heterogeneity by distance and state *****
* Regressions
use "Data/MktCornSpreadMonthlyAllSilos.dta", clear
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSCornState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN, NE, IA but drop OR since it's an export terminal
drop if state == "OR"
tab state
*gen date = ym(year, month)
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgCornSpot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace CHIdist = CHIdist/100
* Cars by distance interaction
gen dist_sq = CHIdist^2
gen carsXdistance = oilcars*CHIdist
gen carsXdistance_sq = oilcars*CHIdist^2
* Production in million bu.
replace corn = corn/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
*** Need to make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = state == "MN"
gen MT = state == "MT"
gen ND = state == "ND"
gen NE = state == "NE"
gen SD = state == "SD"
gen MNoilcars = MN*oilcars
gen MToilcars = MT*oilcars
gen NDoilcars = ND*oilcars
gen NEoilcars = NE*oilcars
gen SDoilcars = SD*oilcars
* Generate variable for fuel price distance interaction
gen PdDist = Pdiesel*CHIdist
** Models 
* Look at effects of distance
reghdfe spread oilcars CHIdist dist_sq Pdiesel tmin TotTraffic i.month#c.corn, vce(cluster marketid date) absorb(constant)
eststo CornDist1
* State intereactions
egen state_id = group(state)
eststo CornDist2
* This one gets's the SE's right
reghdfe spread oilcars MNoilcars MToilcars NDoilcars NEoilcars SDoilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.corn) 
eststo CornDist3
** Make table 
#delimit;
estout CornDist1 CornDist3
using "Tables/CornDistSpread.txt", order(oilcars CHIdist dist_sq carsXdistance PdDist _IstaXoilca_2 _IstaXoilca_3 _IstaXoilca_4 _IstaXoilca_5 _IstaXoilca_6)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr
* Soy - Asides on distance effects
* Regressions
use "Data/MktSoySpreadMonthlyAllSilos.dta", clear
drop if O_ST == "MT"
*** Create variable for crop years
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 6
replace cropyear = 2010 if year == 2010 & month >= 7
replace cropyear = 2010 if year == 2011 & month <= 6
replace cropyear = 2011 if year == 2011 & month >= 7
replace cropyear = 2011 if year == 2012 & month <= 6
replace cropyear = 2012 if year == 2012 & month >= 7
replace cropyear = 2012 if year == 2013 & month <= 6
replace cropyear = 2013 if year == 2013 & month >= 7
replace cropyear = 2013 if year == 2014 & month <= 6
replace cropyear = 2014 if year == 2014 & month >= 7
replace cropyear = 2014 if year == 2015 & month <= 6
replace cropyear = 2015 if year == 2015 & month >= 7
** NASS Data
sort cropyear state
merge m:1 cropyear state using "Data/NASSSoyState.dta",
keep if _merge == 3
drop _merge
* Keep ND, SD, MT, MN, NE, IA but drop OR since it's likely an export terminal
drop if state == "OR"
tab state
*gen date = ym(year, month)
format date %tm
sort date
merge m:1 date using "Data/PRR_Rail_Daily_Hughes_monthly.dta", 
* Keep years for which we have Genscape and Geograin data
drop if year < 2012
drop if year > 2015
* Check everything matched in merge
tab _merge
drop _merge
order marketid year month price bid_h bid_l AvgSoySpot spread num_cars
* Merge in Diesel price
sort date
merge m:1 date using "Data/Diesel_clean.dta", 
keep if _merge == 3
drop _merge
* Convert cars to 1000s
rename num_cars oilcars
replace oilcars = oilcars/1000
* Convert distance to miles
replace CHIdist = CHIdist/100
* Cars by distance interaction
gen dist_sq = CHIdist^2
gen carsXdistance = oilcars*CHIdist
gen carsXdistance_sq = oilcars*CHIdist^2
* Production in million bu.
replace soy = soy/10^6
replace TotTraffic = TotTraffic/1000
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
*** Need to make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = state == "MN"
gen MT = state == "MT"
gen ND = state == "ND"
gen NE = state == "NE"
gen SD = state == "SD"
gen MNoilcars = MN*oilcars
gen MToilcars = MT*oilcars
gen NDoilcars = ND*oilcars
gen NEoilcars = NE*oilcars
gen SDoilcars = SD*oilcars
* Generate variable for fuel price distance interaction
gen PdDist = Pdiesel*CHIdist
** Models 
* Look at effects of distance
reghdfe spread oilcars CHIdist dist_sq Pdiesel tmin TotTraffic i.month#c.soy, vce(cluster marketid date) absorb(constant)
eststo SoyDist1
* State intereactions
egen state_id = group(state)
* This one gets's the SE's right
reghdfe spread oilcars MNoilcars MToilcars NDoilcars NEoilcars SDoilcars PdDist tmin TotTraffic, vce(cluster marketid date) absorb(i.marketid i.month#c.soy) 
eststo SoyDist3
** Make table 
#delimit;
estout SoyDist1 SoyDist3
using "Tables/SoyDistSpread.txt", order(oilcars CHIdist dist_sq carsXdistance PdDist  _IstaXoilca_2 _IstaXoilca_3 _IstaXoilca_4  _IstaXoilca_5  _IstaXoilca_6)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f)) 
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace drop();
#delimit cr

***** Appendix Table 4 - Quantities of wheat shipped to the west coast *****
*** Regressions of quantities interacted with West dummy
use "Data/WheatinCropTimeWestEast.dta", clear
* Total traffic in thousand carloads
replace TotTraffic = TotTraffic/1000
egen state_id = group(O_ST)
*** Make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = O_ST == "MN"
gen MT = O_ST == "MT"
gen ND = O_ST == "ND"
gen NE = O_ST == "NE"
gen SD = O_ST == "SD"
gen MNoilcars = MN*oilcars_stb
gen MToilcars = MT*oilcars_stb
gen NDoilcars = ND*oilcars_stb
gen NEoilcars = NE*oilcars_stb
gen SDoilcars = SD*oilcars_stb
gen MNloilcars = MN*loilcars
gen MTloilcars = MT*loilcars
gen NDloilcars = ND*loilcars
gen NEloilcars = NE*loilcars
gen SDloilcars = SD*loilcars
** Cars Regressions
* Look at differences across states
gen MNWestloilcars = MN*loilcars*West
reghdfe lcars loilcars MNWestloilcars i.West#c.MTloilcars i.West#c.NDloilcars i.West#c.SDloilcars lpdiesel tmin lTotTraff, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
xi: areg lcars loilcars MNWestloilcars i.West|MTloilcars i.West|NDloilcars i.West|SDloilcars lpdiesel tmin lTotTraff i.m|lproduction if _fillin == 0, absorb(O_FIPS)
eststo wheatEW
*** Make quantity west table 
#delimit;
estout wheatEW
using "Tables/QWEST.txt", order() 
drop( _Im* _ImXlpro_* lproduction )
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace;
#delimit cr

***** Appendix Table 5 - Shipments to the west coast *****
*** West coast destinations
use  "Data/WestProbits.dta", clear
table O_ST year if STCC5 == 1137, c(mean West)
tab T_ST if West == 1
* Convert distance to miles
replace MNdist = MNdist/100
* Cars by distance interaction
gen dist_sq = MNdist^2
gen loilcarsXdistance = oilcars*MNdist
gen loilcarsXdistance_sq = oilcars*MNdist^2
* Make distance variables
sum MNdist
gen distcat = .
foreach c in 1 2 3 4 5 {
replace distcat = `c' if MNdist >= (`c'-1)*2 & MNdist < (`c'*2)
}
replace distcat = 5 if MNdist >= 10
gen d1 = distcat == 1
gen d2 = distcat == 2
gen d3 = distcat == 3
gen d4 = distcat == 4
gen d5 = distcat == 5
gen d1loilcars =d1*loilcars
gen d2loilcars =d2*loilcars
gen d3loilcars =d3*loilcars
gen d4loilcars =d4*loilcars
gen d5loilcars =d5*loilcars
** Make constant so that we can absorb and use regdhfe
gen byte constant = 1
egen state_id = group(O_ST)
*** Make the state interactions by hand so reghdfe reports coeff. rel. to MN
gen MN = O_ST == "MN"
gen MT = O_ST == "MT"
gen ND = O_ST == "ND"
gen NE = O_ST == "NE"
gen SD = O_ST == "SD"
gen MNloilcars = MN*loilcars
gen MTloilcars = MT*loilcars
gen NDloilcars = ND*loilcars
gen NEloilcars = NE*loilcars
gen SDloilcars = SD*loilcars
** West Coast destinations
reghdfe West loilcars loilcarsXdistance lpdiesel lTotTraff tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat5
* Add quad term
reghdfe West loilcars loilcarsXdistance loilcarsXdistance_sq lpdiesel lTotTraff tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat6
* State effects
reghdfe West loilcars MTloilcars NDloilcars SDloilcars lpdiesel lTotTraff tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat7
* Distance categories
reghdfe West loilcars d2loilcars d3loilcars d4loilcars d5loilcars lpdiesel lTotTraff tmin, vce(cluster O_FIPS cropdate) absorb(i.m#c.lproduction i.O_FIPS)
eststo wheat8
* Probit with distance categories
xi: probit2 West loilcars d2loilcars d3loilcars d4loilcars d5loilcars lpdiesel lTotTraff tmin i.m|lproduction i.O_FIPS, fcluster(O_FIPS) tcluster(cropdate)
eststo wheat9
xi: probit West loilcars d2loilcars d3loilcars d4loilcars d5loilcars lpdiesel lTotTraff tmin i.m|lproduction i.O_FIPS, 
margins , dydx(*)
*** Make west table 
#delimit;
estout wheat5 wheat6 wheat7 wheat8 wheat9
using "Tables/WEST.txt", order(loilcars loilcarsXdistance loilcarsXdistance_sq _IO_SXloilc_* _IdisXloilc_*) 
drop(_IO_FIPS_* _Im* _ImXlpro_* lproduction o._IO_FIPS_*)
cells(b(star fmt(%9.3f)) se(par fmt(%9.3f)) ) style(tab)  stats(N p r2_a, fmt(%8.0f %9.3f))  
starlevels(* 0.1 ** 0.05 *** 0.01) varlabels(_cons Constant) replace;
#delimit cr



 
